{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Pandas-时间及算数运算"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "内容介绍:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import datetime\n",
    "import pandas.tseries.offsets as offset"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 1.时期Period_Index"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "使用period_range生成时间范围的PeriodIndex\n",
    "\n",
    "* 使用语法同date_range()基本相同\n",
    "* 语法说明:<br>\n",
    "period_range(start=None, end=None, periods=None, freq=None, name=None) -> pandas.core.indexes.period.PeriodIndex\n",
    "* 时间范围可以作为轴索引\n",
    "* 通过时间数据或者字符串创建"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "PeriodIndex(['2000-01', '2000-02', '2000-03', '2000-04', '2000-05', '2000-06'], dtype='period[M]', freq='M')"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "prng = pd.period_range('2000-1-1','2000-6-30',freq='M')\n",
    "prng"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2000-01    0.856130\n",
       "2000-02    0.359939\n",
       "2000-03   -0.387629\n",
       "2000-04    0.348737\n",
       "2000-05    1.739532\n",
       "2000-06    0.832387\n",
       "Freq: M, dtype: float64"
      ]
     },
     "execution_count": 48,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#(1)时间范围作为序列索引\n",
    "ts = pd.Series(np.random.randn(len(prng)),index=prng)\n",
    "ts"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "PeriodIndex(['2018Q3', '2018Q2', '2018Q1'], dtype='period[Q-DEC]', freq='Q-DEC')"
      ]
     },
     "execution_count": 49,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#(2)字符串数组，也可以使用Period_Index\n",
    "values = ['2018Q3','2018Q2','2018Q1']\n",
    "index2 = pd.PeriodIndex(values, freq='Q-DEC')\n",
    "index2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "PeriodIndex(['2017Q1', '2017Q2', '2017Q3', '2017Q4', '2018Q1', '2018Q2',\n",
       "             '2018Q3', '2018Q4'],\n",
       "            dtype='period[Q-DEC]', freq='Q-DEC')"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# (3)通过表示时间信息的数组创建PeriodIndex\n",
    "# 结合年度和季度信息为参数生成时间范围\n",
    "year = np.array(list(('2017',)*4+('2018',)*4)).astype(np.int32)\n",
    "quarter = np.array(list(('1','2','3','4')*2)).astype(np.int32)\n",
    "index3 = pd.PeriodIndex(year=year,quarter=quarter,freq='Q-DEC')\n",
    "index3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>year</th>\n",
       "      <th>quarter</th>\n",
       "      <th>realgdp</th>\n",
       "      <th>realcons</th>\n",
       "      <th>realinv</th>\n",
       "      <th>realgovt</th>\n",
       "      <th>realdpi</th>\n",
       "      <th>cpi</th>\n",
       "      <th>m1</th>\n",
       "      <th>tbilrate</th>\n",
       "      <th>unemp</th>\n",
       "      <th>pop</th>\n",
       "      <th>infl</th>\n",
       "      <th>realint</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1959.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2710.349</td>\n",
       "      <td>1707.4</td>\n",
       "      <td>286.898</td>\n",
       "      <td>470.045</td>\n",
       "      <td>1886.9</td>\n",
       "      <td>28.98</td>\n",
       "      <td>139.7</td>\n",
       "      <td>2.82</td>\n",
       "      <td>5.8</td>\n",
       "      <td>177.146</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1959.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2778.801</td>\n",
       "      <td>1733.7</td>\n",
       "      <td>310.859</td>\n",
       "      <td>481.301</td>\n",
       "      <td>1919.7</td>\n",
       "      <td>29.15</td>\n",
       "      <td>141.7</td>\n",
       "      <td>3.08</td>\n",
       "      <td>5.1</td>\n",
       "      <td>177.830</td>\n",
       "      <td>2.34</td>\n",
       "      <td>0.74</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1959.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>2775.488</td>\n",
       "      <td>1751.8</td>\n",
       "      <td>289.226</td>\n",
       "      <td>491.260</td>\n",
       "      <td>1916.4</td>\n",
       "      <td>29.35</td>\n",
       "      <td>140.5</td>\n",
       "      <td>3.82</td>\n",
       "      <td>5.3</td>\n",
       "      <td>178.657</td>\n",
       "      <td>2.74</td>\n",
       "      <td>1.09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1959.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>2785.204</td>\n",
       "      <td>1753.7</td>\n",
       "      <td>299.356</td>\n",
       "      <td>484.052</td>\n",
       "      <td>1931.3</td>\n",
       "      <td>29.37</td>\n",
       "      <td>140.0</td>\n",
       "      <td>4.33</td>\n",
       "      <td>5.6</td>\n",
       "      <td>179.386</td>\n",
       "      <td>0.27</td>\n",
       "      <td>4.06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1960.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2847.699</td>\n",
       "      <td>1770.5</td>\n",
       "      <td>331.722</td>\n",
       "      <td>462.199</td>\n",
       "      <td>1955.5</td>\n",
       "      <td>29.54</td>\n",
       "      <td>139.6</td>\n",
       "      <td>3.50</td>\n",
       "      <td>5.2</td>\n",
       "      <td>180.007</td>\n",
       "      <td>2.31</td>\n",
       "      <td>1.19</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     year  quarter   realgdp  realcons  realinv  realgovt  realdpi    cpi  \\\n",
       "0  1959.0      1.0  2710.349    1707.4  286.898   470.045   1886.9  28.98   \n",
       "1  1959.0      2.0  2778.801    1733.7  310.859   481.301   1919.7  29.15   \n",
       "2  1959.0      3.0  2775.488    1751.8  289.226   491.260   1916.4  29.35   \n",
       "3  1959.0      4.0  2785.204    1753.7  299.356   484.052   1931.3  29.37   \n",
       "4  1960.0      1.0  2847.699    1770.5  331.722   462.199   1955.5  29.54   \n",
       "\n",
       "      m1  tbilrate  unemp      pop  infl  realint  \n",
       "0  139.7      2.82    5.8  177.146  0.00     0.00  \n",
       "1  141.7      3.08    5.1  177.830  2.34     0.74  \n",
       "2  140.5      3.82    5.3  178.657  2.74     1.09  \n",
       "3  140.0      4.33    5.6  179.386  0.27     4.06  \n",
       "4  139.6      3.50    5.2  180.007  2.31     1.19  "
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# （4）通过dataframe中的列数据结合生成期间\n",
    "df = pd.read_csv('macrodata.csv')  # 获取美国部分年度的宏观经济数据，包含年份和季度的数据标签列\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "PeriodIndex(['1959Q1', '1959Q2', '1959Q3', '1959Q4', '1960Q1', '1960Q2',\n",
       "             '1960Q3', '1960Q4', '1961Q1', '1961Q2',\n",
       "             ...\n",
       "             '2007Q2', '2007Q3', '2007Q4', '2008Q1', '2008Q2', '2008Q3',\n",
       "             '2008Q4', '2009Q1', '2009Q2', '2009Q3'],\n",
       "            dtype='period[Q-DEC]', length=203, freq='Q-DEC')"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "index4 = pd.PeriodIndex(year=df.year,quarter=df.quarter,freq='Q-DEC')\n",
    "index4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.index=index4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1959Q1     28.980\n",
       "1959Q2     29.150\n",
       "1959Q3     29.350\n",
       "1959Q4     29.370\n",
       "1960Q1     29.540\n",
       "           ...   \n",
       "2008Q3    216.889\n",
       "2008Q4    212.174\n",
       "2009Q1    212.671\n",
       "2009Q2    214.469\n",
       "2009Q3    216.385\n",
       "Freq: Q-DEC, Name: cpi, Length: 203, dtype: float64"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.cpi"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 2.时期的频率转换一般讲解\n",
    "\n",
    "* 通过period.asfreq函数可以转换频率\n",
    "* 从高频率转换为低频率时，超时期(较大的时期)是由子时期(较小的时期)的位置决定的\n",
    "* PeriodIndex和TimeSeries的频率抓换方式相同"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "asfreq(...)\n",
    "    Convert Period to desired frequency, at the start or end of the interval.\n",
    "    \n",
    "    Parameters\n",
    "    ----------\n",
    "    freq : str\n",
    "        The desired frequency.\n",
    "    how : {'E', 'S', 'end', 'start'}, default 'end'\n",
    "        Start or end of the timespan.\n",
    "    \n",
    "    Returns\n",
    "    -------\n",
    "    resampled : Period"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Period('2018', 'A-DEC')"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 创建一个时期对象\n",
    "# 以12月结束的2018年度每个月的最后一个日历日\n",
    "p = pd.Period(2018,freq='A-DEC')\n",
    "p"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Period('2018-01', 'M')"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#转换成年初的一个月度时期\n",
    "p.asfreq('M',how='S')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Period('2018-12', 'M')"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#转换成年末的一个月度时期\n",
    "p.asfreq('M')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**频率转换涉及重要的概念是，财政年度**<br>\n",
    "**即美国(或部分国家)的财政年度是从公历年的7月到第二年的6月结束**<br>\n",
    "**2018美国财年为公历年度的2017.7-2018.6月**\n",
    "\n",
    "* A-JUN:表明财政年度的结尾是JUN，即6月，类似于美国的财政年度2017.7-2018.6\n",
    "* A-DEC:表明财政年度的结尾是DEC，即12月,财政年度同公历年度2017.1-2017.12"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Period('2018', 'A-JUN')"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#不以12月结束的财政年度，月度时期的归属情况\n",
    "p2 = pd.Period('2018',freq='A-JUN')#以6月结束2018年度的每个月最后一个日历日，从2017-7到2018-6为一个时期\n",
    "p2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Period('2017-07', 'M')"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#2017-7这个时期开始的\n",
    "p2.asfreq('M',how='S')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Period('2018-06', 'M')"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#2018-6这个时期的结束。asfreq函数默认的how='E'或'end'即时期的最后一个月\n",
    "p2.asfreq('M')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Period('2018', 'A-JUN')"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#从高频转换为低频时，超时期(较大的时期)是由子时期（较小的时期）的位置决定的\n",
    "p21=pd.Period('201803','M')\n",
    "#2018-3返回2018财年\n",
    "p21.asfreq('A-JUN')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Period('2019', 'A-JUN')"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#从高频转换为低频时，超时期(较大的时期)是由子时期（较小的时期）的位置决定的\n",
    "p22=pd.Period('201809','M')\n",
    "#2018-9返回2019财年\n",
    "p22.asfreq('A-JUN')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2016    0.751996\n",
       "2017    0.438701\n",
       "2018   -1.175667\n",
       "2019   -0.441644\n",
       "Freq: A-DEC, dtype: float64"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#对于PeiodIndex和TimeSeries的频率转换方式相同\n",
    "rng = pd.period_range('2016','2019',freq='A-DEC')\n",
    "ts = pd.Series(np.random.randn(len(rng)),index=rng)\n",
    "ts"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2016-01    0.751996\n",
       "2017-01    0.438701\n",
       "2018-01   -1.175667\n",
       "2019-01   -0.441644\n",
       "Freq: M, dtype: float64"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#频率转换成一个月度时期索引\n",
    "ts.asfreq('M',how='start')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2016-12    0.751996\n",
       "2017-12    0.438701\n",
       "2018-12   -1.175667\n",
       "2019-12   -0.441644\n",
       "Freq: M, dtype: float64"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#默认how的方式是end,为日历年度的最后一个月\n",
    "ts.asfreq('M')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Help on method_descriptor:\n",
      "\n",
      "asfreq(...)\n",
      "    Convert Period to desired frequency, at the start or end of the interval.\n",
      "    \n",
      "    Parameters\n",
      "    ----------\n",
      "    freq : str\n",
      "        The desired frequency.\n",
      "    how : {'E', 'S', 'end', 'start'}, default 'end'\n",
      "        Start or end of the timespan.\n",
      "    \n",
      "    Returns\n",
      "    -------\n",
      "    resampled : Period\n",
      "\n"
     ]
    }
   ],
   "source": [
    "help(pd.Period.asfreq)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
